XML - Gaston Sanchezlibrary("XML")
Gaston Sanchez - Webdaten bekommen
| Function | Description |
|---|---|
| xmlName() | name of the node |
| xmlSize() | number of subnodes |
| xmlAttrs() | named character vector of all attributes |
| xmlGetAttr() | value of a single attribute |
| xmlValue() | contents of a leaf node |
| xmlParent() | name of parent node |
| xmlAncestors() | name of ancestor nodes |
| getSibling() | siblings to the right or to the left |
| xmlNamespace() | the namespace (if there’s one) |
Administrative Grenzen für Deutschland
url <- "https://api.openstreetmap.org/api/0.6/relation/62422"
BE <- xmlParse(url)
BE <- xmlParse("../data/62422.xml")
Administrative Grenzen Berlin
xmltop = xmlRoot(BE)
class(xmltop)
## [1] "XMLInternalElementNode" "XMLInternalNode"
## [3] "XMLAbstractNode"
xmlSize(xmltop)
## [1] 1
xmlSize(xmltop[[1]])
## [1] 337
Xpath, the XML Path Language, is a query language for selecting nodes from an XML document.
xpathApply(BE,"//tag[@k = 'population']")
## [[1]]
## <tag k="population" v="3440441"/>
##
## attr(,"class")
## [1] "XMLNodeSet"
xpathApply(BE,"//tag[@k = 'source:population']")
## [[1]]
## <tag k="source:population" v="http://www.statistik-berlin-brandenburg.de/Publikationen/Stat_Berichte/2010/SB_A1-1_A2-4_q01-10_BE.pdf 2010-10-01"/>
##
## attr(,"class")
## [1] "XMLNodeSet"
xpathApply(BE,"//tag[@k = 'name:ta']")
## [[1]]
## <tag k="name:ta" v="<U+0BAA><U+0BC6><U+0BB0><U+0BCD><U+0BB2><U+0BBF><U+0BA9><U+0BCD>"/>
##
## attr(,"class")
## [1] "XMLNodeSet"
region <- xpathApply(BE,
"//tag[@k = 'geographical_region']")
# regular expressions
region[[1]]
## <tag k="geographical_region" v="Barnim;Berliner Urstromtal;Teltow;Nauener Platte"/>
<tag k="geographical_region"
v="Barnim;Berliner Urstromtal;
Teltow;Nauener Platte"/>
Barnim
url2<-"http://api.openstreetmap.org/api/0.6/node/25113879"
obj2<-xmlParse(url2)
obj_amenity<-xpathApply(obj2,"//tag[@k = 'amenity']")[[1]]
obj_amenity
## <tag k="amenity" v="university"/>
xpathApply(obj2,"//tag[@k = 'wikipedia']")[[1]]
## <tag k="wikipedia" v="de:Universität Mannheim"/>
xpathApply(obj2,"//tag[@k = 'wheelchair']")[[1]]
xpathApply(obj2,"//tag[@k = 'name']")[[1]]
url3<-"http://api.openstreetmap.org/api/0.6/node/303550876"
obj3 <- xmlParse(url3)
xpathApply(obj3,"//tag[@k = 'opening_hours']")[[1]]
## <tag k="opening_hours" v="Mo-Sa 09:00-20:00; Su,PH off"/>
url4<-"http://api.openstreetmap.org/api/0.6/node/25439439"
obj4 <- xmlParse(url4)
xpathApply(obj4,"//tag[@k = 'railway:station_category']")[[1]]
## <tag k="railway:station_category" v="2"/>
library(rvest)
## Loading required package: xml2
##
## Attaching package: 'rvest'
## The following object is masked from 'package:XML':
##
## xml
bhfkat<-read_html(
"https://de.wikipedia.org/wiki/Bahnhofskategorie")
df_html_bhfkat<-html_table(
html_nodes(bhfkat, "table")[[2]],fill = TRUE)
| Stufe | Bahnsteigkanten | Bahnsteiglänge[Anm 1] | Reisende/Tag | Zughalte/Tag | Service[Anm 2] | Stufenfreiheit[Anm 3] |
|---|---|---|---|---|---|---|
| (0) | — | — | — | — | Nein | Nein |
| 1 | 01 | > 000 bis 090 m | 00.000 bis 00.049 | 000 bis 0010 | Ja | Ja |
| 2 | 02 | > 090 bis 140 m | 00.050 bis 00.299 | 011 bis 0050 | — | — |
| 3 | 03 bis 04 | > 140 bis 170 m | 00.300 bis 0.0999 | 051 bis 0100 | — | — |
| 4 | 05 bis 09 | > 170 bis 210 m | 01.000 bis 09.999 | 101 bis 0500 | — | — |
| 5 | 10 bis 14 | > 210 bis 280 m | 10.000 bis 49.999 | 501 bis 1000 | — | — |
| 6 | 00i ab 15 | > 280 m bis 000 | 000000 ab 50.000 | 000i ab 1001 | — | — |
| Gewichtung | 20 % | 20 % | 20 % | 20 % | 15 % | 5 % |
url5<-"http://api.openstreetmap.org/api/0.6/way/162149882"
obj5<-xmlParse(url5)
xpathApply(obj5,"//tag[@k = 'name']")[[1]]
## <tag k="name" v="City-Airport Mannheim"/>
xpathApply(obj5,"//tag[@k = 'website']")[[1]]
## <tag k="website" v="http://www.flugplatz-mannheim.de/"/>
xpathApply(obj5,"//tag[@k = 'iata']")[[1]]
## <tag k="iata" v="MHG"/>
Deborah Nolan - Extracting data from XML
Duncan Temple Lang - A Short Introduction to the XML package for R
citation("XML")
##
## To cite package 'XML' in publications use:
##
## Duncan Temple Lang and the CRAN Team (2018). XML: Tools for
## Parsing and Generating XML Within R and S-Plus. R package
## version 3.98-1.11. https://CRAN.R-project.org/package=XML
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {XML: Tools for Parsing and Generating XML Within R and S-Plus},
## author = {Duncan Temple Lang and the CRAN Team},
## year = {2018},
## note = {R package version 3.98-1.11},
## url = {https://CRAN.R-project.org/package=XML},
## }
##
## ATTENTION: This citation information has been auto-generated from
## the package DESCRIPTION file and may need manual editing, see
## 'help("citation")'.
citation("xml2")
##
## To cite package 'xml2' in publications use:
##
## Hadley Wickham, James Hester and Jeroen Ooms (2018). xml2: Parse
## XML. R package version 1.2.0.
## https://CRAN.R-project.org/package=xml2
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {xml2: Parse XML},
## author = {Hadley Wickham and James Hester and Jeroen Ooms},
## year = {2018},
## note = {R package version 1.2.0},
## url = {https://CRAN.R-project.org/package=xml2},
## }